library(vegan)
speabu<-read.csv('abund.csv', header=TRUE, row.names=1)
envdata<-read.csv('env.csv',header=TRUE,row.names=1)
#first transform both datasets
speabu.tran<-data.stand(speabu[,-1],method='total',margin='row',plot=F)
envdata.tran<-data.trans(envdata,method='log',plot=F)
#determine appropriate response model (i.e. is a linear or unimodal response model appropriate for the community under investigation) CAP & RDA=linear, CCA=chi-square
#compute gradient length types (DCA), in general >4 unimodal appropritae, 2-4 probably unimodel, <2 probably linear
decorana(speabu.tran,ira=0)
#interpretation of this: DCA1 is 5.5 SDs which suggest unimodal model so use CCA
##conducting canonical correspondence analysis
#NOTES: 1. rare species should not be included, 2. consider all transformations for both Y and X matrices, 3. eliminate highly redundant explanatory variables in the x matrix
#use cca function: 1. x is the cumminuty dataset, y is the constraining or explanatory matrix, z is a conditioning matrix (explained more alte in partial CCA and RDA)
#NOTE: below, the period to the right of the ~ indicates'all' variables in the specified dataset - alternatively, could have listed each of the variables on the righthand side of the equation
spe.cca<-cca(speabu.tran~., envdata.tran)
summary(spe.cca)
#interpretation: intertia in CCA corresponds to variance.  THe proporation of total inertia captured by the constraining environmental matrix indicates the percent variance explained.  Here 31.5% of the variance is explained by the environmental matrix (1.99/6.35)*100, to examine the specific proportion explained by each ordination axis examine the cumulative variance explained by CC1, CC2 etc.  In the output lambda refers the eigenvalue, or the amount of variation.  E.g.: CCA1 explains 7.5% of the variation in the original fish community matrix and the first and 2nd CCA together explain 14.1% of variation.  
##TO TEST SIGNIFICANCE (Monte Carlo global permutation tests) - several are available
#first look at all constraints simultaneously (considering all environmental constraints at once)
anova(spe.cca)
#second we may want to look at each axis in turn - the test based on the first axis has max power against the alternative hypothesis that there is a SINGLE dominating gradient that determines the relation between species and environment.  THis is a sequential test, so the second test just partiions out the first axis and tests the residual variance, then the third test partitions out the first and second etc.
anova(spe.cca, by='axis')
#finally we may want to look at each TERM in the model, performs a second signficance test for each term (constraining variable)
anova(spe.cca,by='terms')
#ORDINATION SCORES: there are 4 types of scores to look at (we discussed in class that WA scores are the most useful)
#lets use WA scores to visualize the ordination
plot(spe.cca,choices=c(1,2), type='points', display='wa', scaling=2)
#or you can just plot site labels:
plot(spe.cca,choices=c(1,2), type='n', display='wa', scaling=2)
text(spe.cca,choices=c(1,2),labels=row.names(speabu))
#interpreting the constrained ordination
#canonical coefficients
#these are eigenvector coefficients, "they are not very useful for interpretation"
#intra-set scores
round(intrasetcor(spe.cca),5)
#inter-set scores (correlations between species-derived sample scores (WA scores)and environmental variables)
round(intersetcor(spe.cca), 5)
#biplot for explanatory variables
#biplot scores for the constraints are given in the summary() of the ordination
summary(spe.cca)
#triplot